The data is simulated via the following code. Histogram of each variable is shown below. Each of x2 : x7 has its own data structure while x1, x8:x10 are randomly generated normal variables.
set.seed(1234)
x1 <- rnorm(1000, 0, 1)
x2 <- sample(c(rnorm(500, -3, 1), rnorm(500, 3, 1)), size = 1000)
x3 <- sample(c(rep(-1, 500), rep(1, 500)), size = 1000)
x4 <- sample(c(rnorm(250, -3, 1), rnorm(750, 3, 1)), size = 1000)
x5 <- sample(c(rnorm(330, -5, 1), rnorm(340, 0, 1), rnorm(330, 5, 1)), size = 1000)
x6 <- sample(c(rnorm(450, -5, 1), rnorm(100, 0, 1), rnorm(450, 5, 1)), size = 1000)
x7 <- sample(c(rnorm(500, -5, 1), rnorm(500, 5, 1)), size = 1000)
x8 <- rnorm(1000, 0, 1)
x9 <- rnorm(1000, 0, 1)
x10 <- rnorm(1000, 0, 1)
data_mult <- tibble::tibble(x1 = x1, x2 = x2, x3 = x3,
x4 = x4, x5 = x5, x6 = x6, x7 = x7,
x8 = x8, x9 = x9, x10 = x10) %>% map_df(scale)
origin_dt_bi <- data_mult %>%
dplyr::select(-x3) %>%
gather(names, values) %>%
mutate(names = as_factor(names))
origin_dt_bi %>%
ggplot(aes(x = values)) +
geom_histogram(binwidth = 0.3) +
geom_density(aes(y = 0.3 * ..count..)) +
facet_wrap(vars(names), ncol = 3)
search_geodesic()Searching method Search_geodesic() is performed on x1, x2, x8:x10 with an additional variable from x3: x7. The following few screenshots are taken at the end of each trial and projection pursuit are able to find the data structure in all the caese.
run_geodesic <- function(var){
set.seed(12345)
mult_geodesic <- data_mult[,c(1,2,var,8,9,10)] %>%
animate_xy(tour_path = guided_tour(holes(), d = 2,
search_f = search_geodesic_latest),
rescale = FALSE)
return(mult_geodesic)
}
Similar to 1-tour_optim_documentation.html, PCA plot and index_val tracing plot are provided. With two columns in each projection basis, PCA is done separately for each columns. The following demo uses variable x1, x2, x7, x8, x9, x10.
proj <- mult_geodesic %>%
filter(info == "interpolation") %>% pull(basis) %>% tail(1)
proj[[1]]
## [,1] [,2]
## [1,] -0.001738144 0.03945460
## [2,] 0.496024430 -0.86384416
## [3,] 0.862833666 0.49212418
## [4,] -0.028429470 -0.05467946
## [5,] 0.077831028 0.06522615
## [6,] -0.051077434 0.05278356
projected <- as.matrix(data_mult[,c(1,2,7,8,9,10)]) %*% proj[[1]] %>% as_tibble()
projected %>%
ggplot(aes(x = V1, y = V2)) +
geom_point() +
ggtitle("without rounding")
# colour by parts of optimisation (static)
mult_releveled <- pca_v1 %>%
filter(info != "interpolation")
p1 <- pca_v1 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
geom_point(data = mult_releveled) +
theme(aspect.ratio = 1) +
ggtitle("PCA plot for v1")
# colour by parts of optimisation (static)
mult_releveled <- pca_v2 %>%
filter(info != "interpolation")
p2 <- pca_v2 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
geom_point(data = mult_releveled) +
theme(aspect.ratio = 1) +
ggtitle("PCA plot for v2")
p1 + p2 + plot_layout(guides = "collect")
mult_animate_v1 <- pca_v1 %>%
mutate(loop = ifelse(info == "start", 1, loop)) %>%
group_by(tries, loop, info3) %>%
mutate(animate_id = group_indices())
mult_animate_v2 <- pca_v2 %>%
mutate(loop = ifelse(info == "start", 1, loop)) %>%
group_by(tries, loop, info3) %>%
mutate(animate_id = group_indices())
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
mult_path_v1 <- mult_animate_v1 %>%
ungroup() %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
theme(legend.position = "none")
mult_path_v2 <- mult_animate_v2 %>%
ungroup() %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point()
last_tries_v1 <- mult_animate_v1$tries[which.max(mult_animate_v1$tries)]
last_tries_v2 <- mult_animate_v2$tries[which.max(mult_animate_v2$tries)]
direction_v1 <- mult_animate_v1 %>%
filter(tries == last_tries_v1, info %in% c("direction_search", "best_direction_search")) %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
xlim(1, max(mult_animate_v1$animate_id)) +
scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) +
theme(legend.position = "none")
direction_v2 <- mult_animate_v2 %>%
filter(tries == last_tries_v2, info %in% c("direction_search", "best_direction_search")) %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
xlim(1, max(mult_animate_v2$animate_id)) +
scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) +
theme(legend.position = "none")
design <- "
13
24
24
"
direction_v1 + mult_path_v1 + direction_v2 + mult_path_v2 +
plot_layout(design = design, guides = "collect")
geo_animate <- mult_animate_v1 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
theme(aspect.ratio = 1,
legend.position = "none") +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p1 <- gganimate::animate(geo_animate, fps = 2)
index_val_animate <- mult_animate_v1 %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_jitter(width = 0.0005) +
theme(aspect.ratio = 1) +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p2 <- gganimate::animate(index_val_animate, fps = 2)
a_mgif <- image_read(p1)
b_mgif <- image_read(p2)
new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
combined <- image_append(c(a_mgif[i], b_mgif[i]))
new_gif <- c(new_gif, combined)
}
new_gif
geo_animate <- mult_animate_v2 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
theme(aspect.ratio = 1,
legend.position = "none") +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p1 <- gganimate::animate(geo_animate, fps = 2)
index_val_animate <- mult_animate_v2 %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_jitter(width = 0.0005) +
theme(aspect.ratio = 1) +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p2 <- gganimate::animate(index_val_animate, fps = 2)
a_mgif <- image_read(p1)
b_mgif <- image_read(p2)
new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
combined <- image_append(c(a_mgif[i], b_mgif[i]))
new_gif <- c(new_gif, combined)
}
new_gif